library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
library(RsSimulx)
## R Speaks Simulx!
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
library(viridis)
## Loading required package: viridisLite
DIR_DERIVED = "../DerivedData"
DERIVED_COMPLETE_CC = paste( DIR_DERIVED, "TFPerception_deterministic.csv", sep="/")
DERIVED_COMPLETE_Y = paste( DIR_DERIVED, "TFPerception_stochastic.csv", sep="/")
DERIVED_TRAIN_CC = paste( DIR_DERIVED, "TFPerception_deterministic_train.csv", sep="/")
DERIVED_TRAIN_Y = paste( DIR_DERIVED, "TFPerception_stochastic_train.csv", sep="/")
study_t <- c(seq(0,4,0.5), 5:12, 16, 24) # sampling points
study_n <- 1000 # sims per group
model.1cmt.iv <- inlineModel("
[LONGITUDINAL]
input = {V, Cl, adde}
EQUATION:
Cc = pkmodel(V,Cl)
DEFINITION:
y = {distribution=lognormal, prediction=Cc, sd=adde}
[INDIVIDUAL]
input = {V_min, V_max, Cl_min, Cl_max }
DEFINITION:
V = {distribution=uniform, min=V_min, max=V_max}
Cl = {distribution=uniform, min=Cl_min, max=Cl_max}")
adm <- list(time=0, amount=1000) # list(time=seq(0,66,by=12), amount=100)
y <- list(name="y", time=study_t) #seq(18, 80, by=6))
Cc <- list(name="Cc", time=study_t ) #seq(0,100, by=0.5))
V <- list(name="V")
p <- c(V_min=1, V_max=200, Cl_min=0.1, Cl_max=20, adde=0.2)
p <- c(V_min=5, V_max=200, Cl_min=1, Cl_max=15, adde=0.2)
g <- list(size=study_n*3) # oversample because we'll remove extreme profiles
model.1cmt.iv.res <- simulx(model = model.1cmt.iv,
output = list(Cc,y),
parameter = p,
treatment = adm,
group = g,
settings = list(seed=123456))
## [INFO] The directory specified in the initialization file of the Lixoft Suite (located at "/Users/lix/lixoft/lixoft.ini") will be used by default: "/Applications/MonolixSuite2020R1.app/Contents/Resources/monolixSuite"
## [INFO] The lixoftConnectors package has been successfully initialized:
## lixoftConnectors package version -> 2020.1
## Lixoft softwares suite version -> 2020R1
hist( model.1cmt.iv.res$parameter$V)
hist( model.1cmt.iv.res$parameter$Cl)
ggplot( model.1cmt.iv.res$Cc, aes(time, Cc, group=id)) +
geom_line( alpha=0.2)
prctilemlx(model.1cmt.iv.res$Cc, number=9, level=90, color="#4682b4" )
remove uninformative profiles
#TODO: clean and combine conditions
df.no_elimination <- model.1cmt.iv.res$Cc %>%
group_by(id) %>%
mutate( Cc_std = (Cc- min(Cc)) / (max(Cc) - min(Cc))) %>%
filter( time == 2, Cc_std > 0.1) %>%
ungroup()
dim(df.no_elimination)
## [1] 2966 4
df.not1CMT <- model.1cmt.iv.res$Cc %>%
group_by(id) %>%
mutate( Cc_std = (Cc- min(Cc)) / (max(Cc) - min(Cc))) %>%
filter( time == max(time), Cc_std < 0.2) %>%
ungroup()
dim(df.not1CMT)
## [1] 3000 4
matched <- sample( intersect( df.no_elimination$id, df.not1CMT$id),
size = study_n,
replace = F )
model.1cmt.iv.res$Cc %>%
group_by(id) %>%
mutate( Cc_std = (Cc- min(Cc)) / (max(Cc) - min(Cc))) %>%
ungroup() %>%
ggplot(., aes(time, Cc_std, group=id)) +
geom_line(alpha=0.05) +
labs( title="Before")
model.1cmt.iv.res$Cc %<>%
filter( id %in% matched )
model.1cmt.iv.res$y %<>%
filter( id %in% matched )
model.1cmt.iv.res$Cc %>%
group_by(id) %>%
mutate( Cc_std = (Cc- min(Cc)) / (max(Cc) - min(Cc))) %>%
ungroup() %>%
ggplot(., aes(time, Cc_std, group=id)) +
geom_line(alpha=0.05) +
labs( title="After")
remove uninformative profiles
df <- model.1cmt.iv.res$Cc %>%
group_by(id) %>%
mutate( Cc_std = (Cc- min(Cc)) / (max(Cc) - min(Cc))) %>%
filter( time == max(time), Cc_std < 0.2) %>%
ungroup() %>%
arrange( id, time )%>%
sample_n( size=study_n, replace = F)
model.1cmt.iv.res$Cc %<>%
filter( id %in% df$id )
model.1cmt.iv.res$y %<>%
filter( id %in% df$id )
model.1cmt.iv.res$Cc %>%
group_by(id) %>%
mutate( Cc_std = (Cc- min(Cc)) / (max(Cc) - min(Cc))) %>%
ungroup() %>%
ggplot(., aes(time, Cc_std, group=id)) +
geom_line(alpha=0.05)
scale_y_log10()
model.1cmt.oa <- inlineModel("
[LONGITUDINAL]
input = {V, Cl, ka, adde}
EQUATION:
Cc = pkmodel(V,Cl,ka)
DEFINITION:
y = {distribution=lognormal, prediction=Cc, sd=adde}
[INDIVIDUAL]
input = {V_min, V_max, Cl_min, Cl_max, ka_min, ka_max }
DEFINITION:
V = {distribution=uniform, min=V_min, max=V_max}
Cl = {distribution=uniform, min=Cl_min, max=Cl_max}
ka = {distribution=uniform, min=ka_min, max=ka_max}" )
adm <- list(time=0, amount=1000) # list(time=seq(0,66,by=12), amount=100)
y <- list(name="y", time=study_t) #seq(18, 80, by=6))
Cc <- list(name="Cc", time=study_t ) #seq(0,100, by=0.5))
V <- list(name="V")
Cl <- list(name="Cl")
ka <- list(name="ka")
g <- list(size=study_n*2) # oversample, to remove uninformative profiles
p <- c(V_min=1, V_max=100, Cl_min=0.1, Cl_max=20, ka_min=0.1, ka_max=0.7, adde=0.2)
model.1cmt.oa.res <- simulx(model = model.1cmt.oa,
output = list(Cc,y, V, Cl, ka),
parameter = p,
treatment = adm,
group = g,
settings = list(seed=42))
## Warning: Mandatory fields are missing in 'output 3', 'time' is not defined.
## Skip 'output 3'. If it is a parameter, note that simulx function now returns all parameters by default.
## Warning: Mandatory fields are missing in 'output 4', 'time' is not defined.
## Skip 'output 4'. If it is a parameter, note that simulx function now returns all parameters by default.
## Warning: Mandatory fields are missing in 'output 5', 'time' is not defined.
## Skip 'output 5'. If it is a parameter, note that simulx function now returns all parameters by default.
ggplot( model.1cmt.oa.res$Cc, aes(time, Cc, group=id)) +
geom_line( alpha=0.2)
prctilemlx(model.1cmt.oa.res$Cc, number=9, level=90, color="#4682b4" )
Remove untypical profiles (less than 20% Cend/Cmax)
df <- model.1cmt.oa.res$Cc %>%
group_by(id) %>%
mutate( Cc_std = (Cc- min(Cc)) / (max(Cc) - min(Cc))) %>%
filter( time == max(time), Cc_std < 0.2) %>%
ungroup() %>%
arrange( id, time )%>%
sample_n( size=study_n, replace = F)
model.1cmt.oa.res$Cc %<>%
filter( id %in% df$id )
model.1cmt.oa.res$y %<>%
filter( id %in% df$id )
model.1cmt.oa.res$Cc %>%
group_by(id) %>%
mutate( Cc_std = (Cc- min(Cc)) / (max(Cc) - min(Cc))) %>%
ungroup() %>%
ggplot(., aes(time, Cc_std, group=id)) +
geom_line(alpha=0.05)
scale_y_log10()
## <ScaleContinuousPosition>
## Range:
## Limits: 0 -- 1
model.2cmt.iv <- inlineModel("
[LONGITUDINAL]
input = {V, Cl, Q, V2, adde}
EQUATION:
Cc = pkmodel(V, k=Cl/V, k12=Q/V, k21=Q/V2)
DEFINITION:
y = {distribution=lognormal, prediction=Cc, sd=adde}
[INDIVIDUAL]
input = {V_min, V_max, Cl_min, Cl_max, V2_min, V2_max, Q_min, Q_max }
DEFINITION:
V = {distribution=uniform, min=V_min, max=V_max}
Cl = {distribution=uniform, min=Cl_min, max=Cl_max}
V2 = {distribution=uniform, min=V2_min, max=V2_max}
Q = {distribution=uniform, min=Q_min, max=Q_max}")
adm <- list(time=0, amount=1000) # list(time=seq(0,66,by=12), amount=100)
y <- list(name="y", time=study_t) #seq(18, 80, by=6))
Cc <- list(name="Cc", time=study_t ) #seq(0,100, by=0.5))
V <- list(name="V")
p <- c(V_min=10, V_max=100, Cl_min=2, Cl_max=10,
V2_min=10, V2_max=1000, Q_min=1, Q_max=20, adde=0.2)
g <- list(size=study_n*2)
model.2cmt.iv.res <- simulx(model = model.2cmt.iv,
output = list(Cc,y),
parameter = p,
treatment = adm,
group = g,
settings = list(seed=123456))
ggplot( model.2cmt.iv.res$Cc, aes(time, Cc, group=id)) +
geom_line( alpha=0.2)
prctilemlx(model.2cmt.iv.res$Cc, number=9, level=90, color="#4682b4" )
remove uninformative profiles
#TODO: clean and combine conditions
df.no_elimination <- model.2cmt.iv.res$Cc %>%
group_by(id) %>%
mutate( Cc_std = (Cc- min(Cc)) / (max(Cc) - min(Cc))) %>%
filter( time == 2, Cc_std > 0.2) %>%
ungroup()
dim(df.no_elimination)
## [1] 1797 4
df.not2CMT <- model.2cmt.iv.res$Cc %>%
group_by(id) %>%
mutate( Cc_std = (Cc- min(Cc)) / (max(Cc) - min(Cc))) %>%
filter( time == 6, Cc_std < 0.4) %>%
ungroup()
matched <- sample( intersect( df.no_elimination$id, df.not2CMT$id),
size = study_n,
replace = F )
model.2cmt.iv.res$Cc %>%
group_by(id) %>%
mutate( Cc_std = (Cc- min(Cc)) / (max(Cc) - min(Cc))) %>%
ungroup() %>%
ggplot(., aes(time, Cc_std, group=id)) +
geom_line(alpha=0.05) +
labs( title="Before")
model.2cmt.iv.res$Cc %<>%
filter( id %in% matched )
model.2cmt.iv.res$y %<>%
filter( id %in% matched )
model.2cmt.iv.res$Cc %>%
group_by(id) %>%
mutate( Cc_std = (Cc- min(Cc)) / (max(Cc) - min(Cc))) %>%
ungroup() %>%
ggplot(., aes(time, Cc_std, group=id)) +
geom_line(alpha=0.05) +
labs( title="After")
model.2cmt.oa <- inlineModel("
[LONGITUDINAL]
input = {V, Cl, Q, V2, ka, adde}
EQUATION:
Cc = pkmodel(V, k=Cl/V, k12=Q/V, k21=Q/V2, ka)
DEFINITION:
y = {distribution=lognormal, prediction=Cc, sd=adde}
[INDIVIDUAL]
input = {V_min, V_max, Cl_min, Cl_max, V2_min, V2_max, Q_min, Q_max, ka_min, ka_max }
DEFINITION:
V = {distribution=uniform, min=V_min, max=V_max}
Cl = {distribution=uniform, min=Cl_min, max=Cl_max}
V2 = {distribution=uniform, min=V2_min, max=V2_max}
Q = {distribution=uniform, min=Q_min, max=Q_max}
ka = {distribution=uniform, min=ka_min, max=ka_max}")
adm <- list(time=0, amount=1000) # list(time=seq(0,66,by=12), amount=100)
y <- list(name="y", time=study_t) #seq(18, 80, by=6))
Cc <- list(name="Cc", time=study_t ) #seq(0,100, by=0.5))
V <- list(name="V")
p <- c(V_min=1, V_max=100, Cl_min=0.1, Cl_max=50,
V2_min=10, V2_max=1000, Q_min=1, Q_max=50,
ka_min=0.1, ka_max=0.7, adde=0.2)
g <- list(size=study_n*2)
model.2cmt.oa.res <- simulx(model = model.2cmt.oa,
output = list(Cc,y),
parameter = p,
treatment = adm,
group = g,
settings = list(seed=123456))
df <- model.2cmt.oa.res$Cc %>%
group_by(id) %>%
mutate( Cc_std = (Cc- min(Cc)) / (max(Cc) - min(Cc))) %>%
filter( time == max(time), Cc_std < 0.2) %>%
ungroup() %>%
arrange( id, time )%>%
sample_n( size=study_n, replace = F)
model.2cmt.oa.res$Cc %<>%
filter( id %in% df$id )
model.2cmt.oa.res$y %<>%
filter( id %in% df$id )
ggplot( model.2cmt.oa.res$Cc, aes(time, Cc, group=id)) +
geom_line( alpha=0.2)
prctilemlx(model.2cmt.oa.res$Cc, number=9, level=90, color="#4682b4" )
# Datasets Generating the complete training datasets
Deterministic sims
df.Cc <- rbind(
model.1cmt.iv.res$Cc %>%
mutate( outcome = "1CMT_IV" ),
model.2cmt.iv.res$Cc %>%
mutate( outcome = "2CMT_IV" ),
model.1cmt.oa.res$Cc %>%
mutate( outcome = "1CMT_OA" ),
model.2cmt.oa.res$Cc %>%
mutate( outcome = "2CMT_OA" ) )
ggplot( df.Cc, aes(time, Cc, group=interaction(id, outcome)) ) +
geom_line( aes(color=outcome), alpha=0.2) +
labs( title="Training data (deterministic)",
x="Time",
y="Concentration",
color="Type") +
theme_minimal()
df.Cc %>%
select( id, time, DV=Cc, outcome) %>%
write.csv( ., DERIVED_COMPLETE_CC, row.names = F)
Stochastic sims
df.y <- rbind(
model.1cmt.iv.res$y %>%
mutate( outcome = "1CMT_IV" ),
model.2cmt.iv.res$y %>%
mutate( outcome = "2CMT_IV" ),
model.1cmt.oa.res$y %>%
mutate( outcome = "1CMT_OA" ),
model.2cmt.oa.res$y %>%
mutate( outcome = "2CMT_OA" ) )
ggplot( df.y, aes(time, y, group=interaction(id, outcome)) ) +
geom_line( aes(color=outcome), alpha=0.2) +
labs( title="Training data (stochastic)",
x="Time",
y="Concentration",
color="Type") +
theme_minimal()
df.y %>%
select( id, time, DV=y, outcome) %>%
write.csv( ., DERIVED_COMPLETE_Y, row.names = F)
study_timepoints <- c(0, 1, 2, 4, 6, 8, 12, 24 )
df.Cc %<>%
group_by(id, outcome) %>%
mutate( Cc_std = (Cc- min(Cc)) / (max(Cc) - min(Cc)),
id = as.numeric(levels(id))[id] )
ggplot( df.Cc, aes(time, Cc_std, color=id, group=id)) +
geom_line( alpha=0.05) +
scale_color_viridis( option="inferno" )+
theme_minimal() +
theme( legend.position = "none") +
facet_wrap( .~outcome) +
labs( title="Training Data",
subtitle=paste( "Simulations per model:", study_n),
y="")
Select sampling time points, pivot from long to wide, and store. Re-run standardization on this subset so as to prevent cross-over.
df <- df.Cc %>%
filter( time %in% study_timepoints) %>%
group_by( id, outcome ) %>%
mutate( Cc_std = (Cc- min(Cc)) / (max(Cc) - min(Cc)) ) %>%
select( id, outcome, Cc_std, time ) %>%
pivot_wider( names_from = "time", names_prefix="t", values_from = "Cc_std")
write.csv( df, DERIVED_TRAIN_CC, quote = F, row.names = F)
Same thing for the y data
df.y %<>%
group_by(id, outcome) %>%
mutate( y_std = (y- min(y)) / (max(y) - min(y)),
id = as.numeric(levels(id))[id] )
ggplot( df.y, aes(time, y_std, color=id, group=id)) +
geom_line( alpha=0.05) +
scale_color_viridis( option="inferno" )+
theme_minimal() +
theme( legend.position = "none") +
facet_wrap( .~outcome) +
labs( title="Training Data (stochastic)",
subtitle=paste( "Simulations per model:", study_n),
y="")
And selecting our time points
df <- df.y %>%
filter( time %in% study_timepoints ) %>%
group_by( id, outcome ) %>%
mutate( y_std = (y- min(y)) / (max(y) - min(y)) ) %>%
select( id, outcome, y_std, time ) %>%
pivot_wider( names_from = "time", names_prefix="t", values_from = "y_std")
write.csv( df, DERIVED_TRAIN_Y, quote = F, row.names = F)